//*********************************************************************//
// 
//
// Author:  Polynomial Approximation
//
// Date:    02 Dec 2015
// 
//*********************************************************************//
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <time.h>

#include "PolynomialApproxAppCpuCode.h"
#include "nrutil.h"

#include "MaxSLiCInterface.h"
#include "Maxfiles.h"

#define MAX_CASES (7000L)
#define MAX_DEGREE (100L)

uint64_t makeItDevidable(uint64_t value, uint64_t devider) {
	if (value % devider) {
		value += devider - value % devider;
	}
	return value;
}

int main(void) {
	FILE *in_data;
	FILE *out_data;

	in_data = fopen(
			"/home/demo/projects/PolynomialApproximation/CPUCode/cheb_1k.txt",
			"r");
	if (NULL == in_data) {
		fprintf(stderr, "Input file not founded.\n");
		exit(1);
	}

	out_data
			= fopen(
					"/home/demo/projects/PolynomialApproximation/CPUCode/cheb_1k_result.txt",
					"w");
	if (NULL == out_data) {
		fprintf(stderr, "Output file not created.\n");
		exit(1);
	}

	uint64_t size;
	fscanf(in_data, "%"PRIu64, &size);

	double data[MAX_CASES][MAX_DEGREE];
	double result[MAX_CASES][MAX_DEGREE];
	uint64_t degree[MAX_CASES], chunk;
	size_t run_time, start;

	uint64_t streamSize;
	uint64_t outerLoop1 =
			PolynomialApproxApp_get_PolynomialApproxAppKernel_outerLoop1();
	streamSize = makeItDevidable(MAX_CASES * MAX_DEGREE, outerLoop1 * PolynomialApproxApp_PCIE_ALIGNMENT);

	double *c_in = malloc(sizeof(double) * streamSize);
	double *d_out = malloc(sizeof(double) * streamSize);
	uint64_t *cnt_out = malloc(sizeof(uint64_t) * streamSize);
	uint8_t *i_in = malloc(sizeof(uint8_t) * streamSize);
	memset(i_in, 1, streamSize);
	uint8_t *empty_in = calloc(sizeof(uint8_t), streamSize);

	uint64_t free_pos;
	run_time = 0;

	while (size > 0) {
		if (size > MAX_CASES) {
			chunk = MAX_CASES;
		} else {
			chunk = size;
		}

		size -= chunk;

		free_pos = 0;

		for (uint64_t i = 0; i < chunk; i++) {
			fscanf(in_data, "%"PRIu64, &degree[i]);
			for (uint64_t j = 0; j < degree[i]; j++) {
				fscanf(in_data, "%lf", &data[i][j]);
			}

			uint64_t k = 0;
			for (int64_t j = degree[i] - 1; j >= 0; j--, k += outerLoop1) {
				c_in[free_pos + k] = data[i][j];
				empty_in[free_pos + k] = 1;
			}
			if (k > 0)
				i_in[free_pos + k - outerLoop1] = 0;

			for (; free_pos < streamSize && empty_in[free_pos] != 0; free_pos++)
				;
		}

		for (free_pos = streamSize - 1; free_pos > 0 && !empty_in[free_pos]; free_pos--);
        free_pos++;

		start = clock();
		uint64_t streamLength = makeItDevidable(free_pos, PolynomialApproxApp_PCIE_ALIGNMENT);
		PolynomialApproxApp(chunk, streamLength, c_in, i_in, cnt_out, d_out);
		run_time += clock() - start;

		for (uint64_t i = 0; i < chunk; i++) {
			chebpc(data[i], result[i], degree[i]);
			//pcshft(-1.0, +1.0, result[i], degree[i]);
		}

		for (uint64_t i = 0; i < chunk; i++) {
			for (uint64_t j = 0; j < degree[i]; j++){
				if (result[cnt_out[i]][j] != d_out[i*MAX_DEGREE + j])
				{
					printf("Razlika: %d %d %.4f\n", (int)cnt_out[i], (int)j ,d_out[cnt_out[i]*MAX_DEGREE + j]);

				}
				fprintf(out_data, "%.4f ", result[i][j]);
			}
			fprintf(out_data, "\n");
		}
	}

	fclose(out_data);
	fclose(in_data);

	printf("Run time %.2f.\n", run_time * 1.0 / CLOCKS_PER_SEC);

	free(c_in);
	free(d_out);
	free(cnt_out);
	free(i_in);
	free(empty_in);

	return 0;
}
